####################
# Loading rep2 data
####################
ldat_new <- read.loom.matrices("/Volumes/bioinfomatics$/Mehdi/Matthias/scRNA-seq/scRNAseq_CRG_02012020/onefilepercell_HFWL7BBXY_1_IDT-DUI-NXT-66_and_others_0VWWZ.loom")## reading loom file via hdf5r...
ldat_new <- lapply(ldat_new,function(x) {
colnames(x) <- gsub(".bam","",gsub("onefilepercell_HFWL7BBXY_1_IDT-DUI-NXT-66_and_others_0VWWZ:","",colnames(x)))
x
})
meta_data <- read.csv(file="/Volumes/bioinfomatics$/Mehdi/Matthias/scRNA-seq/scRNAseq_CRG_02012020/sample_info.csv", header=TRUE, sep=",")
rownames(meta_data) <- meta_data$sample_ID
meta_data$rep <- 'rep2'
meta_data <- meta_data[,c('rep','sample_label')]
colnames(meta_data) <- c('rep','celltype')
meta_data$celltype <- gsub('TCRint_DP_WT','TCRhiDP',gsub('CD8_SP_TCRpos_WT','CD8SP',gsub("CD4_SP_WT","CD4SP",gsub("CD4_pos_8lo_WT","CD4+8low",gsub("CD69_pos_DP_WT","CD69posDP",gsub("CD69_neg_DP_WT","CD69negDP",meta_data$celltype))))))
meta_data$celltype <- gsub('TCRint_DP_MHCclassII_KO','TCRhiDP_C2KO',gsub('CD8_SP_TCRpos_MHCclassII_KO','CD8SP_C2KO',gsub("CD4_SP_MHCclassII_KO","CD4SP_C2KO",gsub("CD4_pos_8lo_classII_KO","CD4+8low_C2KO",gsub("CD69_pos_DP_MHCclassII_KO","CD69posDP_C2KO",gsub("CD69_neg_DP_MHCclassII_KO","CD69negDP_C2KO",meta_data$celltype))))))
meta_data <- meta_data[!(rownames(meta_data)=="HFWL7BBXY_1_IDT-DUI-NXT-49" | rownames(meta_data)=="HFWL7BBXY_1_IDT-DUI-NXT-333"),]
meta_data[grep('C2KO',meta_data$celltype),]$rep <- 'rep2_KO'
meta_data$celltype <- gsub('_C2KO','', meta_data$celltype)
# exonic read (spliced) expression matrix
emat_rep2 <- ldat_new$spliced
emat_rep2 <- emat_rep2[, colnames(emat_rep2) %in% as.vector(rownames(meta_data))];
# intronic read (unspliced) expression matrix
nmat_rep2 <- ldat_new$unspliced
nmat_rep2 <- nmat_rep2[, colnames(nmat_rep2) %in% as.vector(rownames(meta_data))];
# spanning read (intron+exon) expression matrix
smat_rep2 <- ldat_new$spanning;
smat_rep2 <- smat_rep2[, colnames(smat_rep2) %in% as.vector(rownames(meta_data))];
######################
# Loading rep1 data
######################
ldat <- read.loom.matrices("/Volumes/bioinfomatics$/Mehdi/Matthias/scRNA-seq/onefilepercell_P2221_N707-S505_and_others_96WOS.loom")## reading loom file via hdf5r...
ldat <- lapply(ldat,function(x) {
colnames(x) <- gsub(".bam","",gsub(".*:","",colnames(x)))
x
})
WishBone_tsne <- read.csv(file="/Volumes/bioinfomatics$/Mehdi/Matthias/scRNA-seq/WishBone_tsne_with_highTCR.csv", header=TRUE, sep=",")
colnames(WishBone_tsne) <- c('CellName','x','y','Sample')
colData <- read.csv(file="/Volumes/bioinfomatics$/Mehdi/Matthias/scRNA-seq/colData.csv", header=TRUE, sep=",")
merged_meta1 <- merge(x = WishBone_tsne, y = colData, by = "CellName")
color_dt <- data.frame(Sample.x=c('WT1','WT2','WT3','WT4','WT5','highTCR_Rest'), color=c('#f44e42','#ffac38','#bcff37','#36eeff','#9335ff','#36ffee'))
merged_meta <- merge(x = merged_meta1, y = color_dt, by = "Sample.x")
merged_meta <- merged_meta[,c('CellCode','Sample.x')]
rownames(merged_meta) <- merged_meta$CellCode
merged_meta$rep <- 'rep1'
merged_meta <- merged_meta[,c('rep','Sample.x')]
colnames(merged_meta) <- c('rep','celltype')
merged_meta$celltype <- gsub('highTCR_Rest','TCRhiDP',gsub('WT5','CD8SP',gsub("WT4","CD4SP",gsub("WT3","CD4+8low",gsub("WT2","CD69posDP",gsub("WT1","CD69negDP",merged_meta$celltype))))))
# exonic read (spliced) expression matrix
emat_rep1 <- ldat$spliced
colnames(emat_rep1) <- gsub("-","_",colnames(emat_rep1))
emat_rep1 <- emat_rep1[, colnames(emat_rep1) %in% as.vector(rownames(merged_meta))];
# intronic read (unspliced) expression matrix
nmat_rep1 <- ldat$unspliced
colnames(nmat_rep1) <- gsub("-","_",colnames(nmat_rep1))
nmat_rep1 <- nmat_rep1[, colnames(nmat_rep1) %in% as.vector(rownames(merged_meta))];
# spanning read (intron+exon) expression matrix
smat_rep1 <- ldat$spanning;
colnames(smat_rep1) <- gsub("-","_",colnames(smat_rep1))
smat_rep1 <- smat_rep1[, colnames(smat_rep1) %in% as.vector(rownames(merged_meta))];
############
#merging rep1 & rep2
############
emat <- cbind(emat_rep1,emat_rep2)
nmat <- cbind(nmat_rep1,nmat_rep2)
smat <- cbind(smat_rep1,smat_rep2)
meta_info <- rbind(merged_meta,meta_data)
###########
# Running Seurat
###########
WT_Thymocye <- CreateSeuratObject(emat, meta.data = meta_info)
WT_Thymocye.list <- SplitObject(WT_Thymocye, split.by = "rep")
for (i in 1:length(WT_Thymocye.list)) {
WT_Thymocye.list[[i]] <- NormalizeData(WT_Thymocye.list[[i]], verbose = FALSE)
WT_Thymocye.list[[i]] <- FindVariableFeatures(WT_Thymocye.list[[i]], selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
}
Rep1_log <- t(GetAssayData(WT_Thymocye.list[[1]],slot='data',assay = "RNA"))
Rep2_log <- t(GetAssayData(WT_Thymocye.list[[3]],slot='data',assay = "RNA"))
Rep2KO_log <- t(GetAssayData(WT_Thymocye.list[[2]],slot='data',assay = "RNA"))
meta_info$x1 <- 'Others'
meta_info$x2 <- 'Others'
meta_info$x3 <- 'Others'
meta_info$x4 <- 'Others'
meta_info$x5 <- 'Cd8-'
meta_info$x6 <- 'Cd4-'
meta_info$x7 <- 'Itm2a-'
meta_info$x8 <- 'Stat1-'
meta_info$x9 <- 'Others'
meta_info$x10 <- 'Others'
meta_info$x11 <- 'Others'
meta_info$x12 <- 'Others'
colnames(meta_info) <- c("rep", "celltype", "Cd4+Cd8+","Cd4+Cd8-", "Cd4-Cd8+", "Cd4-Cd8-", 'Cd8', 'Cd4', 'Itm2a', 'Stat1', "Zbtb7b+Runx3+","Zbtb7b+Runx3-", "Zbtb7b-Runx3+", "Zbtb7b-Runx3-")
meta_info[rownames(Rep1_log[Rep1_log[,'Zbtb7b'] > 0.1 & Rep1_log[,'Runx3'] > 0.1,]),'Zbtb7b+Runx3+'] <- 'Zbtb7b+Runx3+'
meta_info[rownames(Rep2_log[Rep2_log[,'Zbtb7b'] > 0.1 & Rep2_log[,'Runx3'] > 0.1,]),'Zbtb7b+Runx3+'] <- 'Zbtb7b+Runx3+'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Zbtb7b'] > 0.1 & Rep2KO_log[,'Runx3'] > 0.1,]),'Zbtb7b+Runx3+'] <- 'Zbtb7b+Runx3+'
meta_info[rownames(Rep1_log[Rep1_log[,'Zbtb7b'] > 0.1 & Rep1_log[,'Runx3'] < 0.1,]),'Zbtb7b+Runx3-'] <- 'Zbtb7b+Runx3-'
meta_info[rownames(Rep2_log[Rep2_log[,'Zbtb7b'] > 0.1 & Rep2_log[,'Runx3'] < 0.1,]),'Zbtb7b+Runx3-'] <- 'Zbtb7b+Runx3-'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Zbtb7b'] > 0.1 & Rep2KO_log[,'Runx3'] < 0.1,]),'Zbtb7b+Runx3-'] <- 'Zbtb7b+Runx3-'
meta_info[rownames(Rep1_log[Rep1_log[,'Zbtb7b'] < 0.1 & Rep1_log[,'Runx3'] > 0.1,]),'Zbtb7b-Runx3+'] <- 'Zbtb7b-Runx3+'
meta_info[rownames(Rep2_log[Rep2_log[,'Zbtb7b'] < 0.1 & Rep2_log[,'Runx3'] > 0.1,]),'Zbtb7b-Runx3+'] <- 'Zbtb7b-Runx3+'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Zbtb7b'] < 0.1 & Rep2KO_log[,'Runx3'] > 0.1,]),'Zbtb7b-Runx3+'] <- 'Zbtb7b-Runx3+'
meta_info[rownames(Rep1_log[Rep1_log[,'Zbtb7b'] < 0.1 & Rep1_log[,'Runx3'] < 0.1,]),'Zbtb7b-Runx3-'] <- 'Zbtb7b-Runx3-'
meta_info[rownames(Rep2_log[Rep2_log[,'Zbtb7b'] < 0.1 & Rep2_log[,'Runx3'] < 0.1,]),'Zbtb7b-Runx3-'] <- 'Zbtb7b-Runx3-'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Zbtb7b'] < 0.1 & Rep2KO_log[,'Runx3'] < 0.1,]),'Zbtb7b-Runx3-'] <- 'Zbtb7b-Runx3-'
meta_info[rownames(Rep1_log[Rep1_log[,'Cd4'] > 0.1 & Rep1_log[,'Cd8a'] > 0.1,]),'Cd4+Cd8+'] <- 'Cd4+Cd8+'
meta_info[rownames(Rep2_log[Rep2_log[,'Cd4'] > 0.1 & Rep2_log[,'Cd8a'] > 0.1,]),'Cd4+Cd8+'] <- 'Cd4+Cd8+'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Cd4'] > 0.1 & Rep2KO_log[,'Cd8a'] > 0.1,]),'Cd4+Cd8+'] <- 'Cd4+Cd8+'
meta_info[rownames(Rep1_log[Rep1_log[,'Cd4'] > 0.1 & Rep1_log[,'Cd8a'] < 0.1,]),'Cd4+Cd8-'] <- 'Cd4+Cd8-'
meta_info[rownames(Rep2_log[Rep2_log[,'Cd4'] > 0.1 & Rep2_log[,'Cd8a'] < 0.1,]),'Cd4+Cd8-'] <- 'Cd4+Cd8-'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Cd4'] > 0.1 & Rep2KO_log[,'Cd8a'] < 0.1,]),'Cd4+Cd8-'] <- 'Cd4+Cd8-'
meta_info[rownames(Rep1_log[Rep1_log[,'Cd4'] < 0.1 & Rep1_log[,'Cd8a'] > 0.1,]),'Cd4-Cd8+'] <- 'Cd4-Cd8+'
meta_info[rownames(Rep2_log[Rep2_log[,'Cd4'] < 0.1 & Rep2_log[,'Cd8a'] > 0.1,]),'Cd4-Cd8+'] <- 'Cd4-Cd8+'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Cd4'] < 0.1 & Rep2KO_log[,'Cd8a'] > 0.1,]),'Cd4-Cd8+'] <- 'Cd4-Cd8+'
meta_info[rownames(Rep1_log[Rep1_log[,'Cd4'] < 0.1 & Rep1_log[,'Cd8a'] < 0.1,]),'Cd4-Cd8-'] <- 'Cd4-Cd8-'
meta_info[rownames(Rep2_log[Rep2_log[,'Cd4'] < 0.1 & Rep2_log[,'Cd8a'] < 0.1,]),'Cd4-Cd8-'] <- 'Cd4-Cd8-'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Cd4'] < 0.1 & Rep2KO_log[,'Cd8a'] < 0.1,]),'Cd4-Cd8-'] <- 'Cd4-Cd8-'
meta_info[rownames(Rep1_log[Rep1_log[,'Cd8a'] > 0.1,]),'Cd8'] <- 'Cd8+'
meta_info[rownames(Rep2_log[Rep2_log[,'Cd8a'] > 0.1,]),'Cd8'] <- 'Cd8+'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Cd8a'] > 0.1,]),'Cd8'] <- 'Cd8+'
meta_info[rownames(Rep1_log[Rep1_log[,'Cd4'] > 0.1,]),'Cd4'] <- 'Cd4+'
meta_info[rownames(Rep2_log[Rep2_log[,'Cd4'] > 0.1,]),'Cd4'] <- 'Cd4+'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Cd4'] > 0.1,]),'Cd4'] <- 'Cd4+'
meta_info[rownames(Rep1_log[Rep1_log[,'Itm2a'] > 0.5,]),'Itm2a'] <- 'Itm2a+'
meta_info[rownames(Rep2_log[Rep2_log[,'Itm2a'] > 0.5,]),'Itm2a'] <- 'Itm2a+'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Itm2a'] > 0.1,]),'Itm2a'] <- 'Itm2a+'
meta_info[rownames(Rep1_log[Rep1_log[,'Stat1'] > 0.2,]),'Stat1'] <- 'Stat1+'
meta_info[rownames(Rep2_log[Rep2_log[,'Stat1'] > 0.2,]),'Stat1'] <- 'Stat1+'
meta_info[rownames(Rep2KO_log[Rep2KO_log[,'Stat1'] > 0.1,]),'Stat1'] <- 'Stat1+'
meta_info[meta_info$celltype=='CD4SP' & meta_info$Itm2a=='Itm2a+' & meta_info$Stat1=='Stat1-', 'celltype'] <- 'CD4SP_Immature'
meta_info[meta_info$celltype=='CD4SP' & (meta_info$Itm2a=='Itm2a-' | meta_info$Stat1=='Stat1+'), 'celltype'] <- 'CD4SP_Mature'
WT_Thymocye <- CreateSeuratObject(emat, meta.data = meta_info)
WT_Thymocye.list <- SplitObject(WT_Thymocye, split.by = "rep")
for (i in 1:length(WT_Thymocye.list)) {
WT_Thymocye.list[[i]] <- NormalizeData(WT_Thymocye.list[[i]], verbose = FALSE)
WT_Thymocye.list[[i]] <- FindVariableFeatures(WT_Thymocye.list[[i]], selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
}
meta_info <- WT_Thymocye@meta.data
reference.list <- WT_Thymocye.list[c("rep1", "rep2","rep2_KO")]
WT_Thymocye.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
WT_Thymocye.integrated <- IntegrateData(anchorset = WT_Thymocye.anchors, dims = 1:30)
DefaultAssay(WT_Thymocye.integrated) <- "integrated"
# Run the standard workflow for visualization and clustering
WT_Thymocye.integrated <- ScaleData(WT_Thymocye.integrated, verbose = FALSE)
WT_Thymocye.integrated <- RunPCA(WT_Thymocye.integrated, npcs = 30, verbose = FALSE)
WT_Thymocye.integrated <- RunUMAP(WT_Thymocye.integrated, reduction = "pca", dims = 1:30)
gene_set <- read.csv(file="/Volumes/bioinfomatics$/Mehdi/Matthias/scRNA-seq/scRNAseq_CRG_02012020/gene_set/DE_gene_drodeRes_rep2_fset.csv", header=FALSE, sep=",")
emat <- cbind(emat_rep1,emat_rep2)
nmat <- cbind(nmat_rep1,nmat_rep2)
smat <- cbind(smat_rep1,smat_rep2)
# meta_info <- rbind(merged_meta,meta_data)
emat <- emat[intersect(gene_set$V1,rownames(emat)),]
emat <- emat[!(rownames(emat) =='Cd4' | rownames(emat) == 'Cd8a' | rownames(emat) =='Cd8b1'),]
###########
#Running Seurat
###########
WT_Thymocye <- CreateSeuratObject(emat, meta.data = meta_info)
WT_Thymocye.list <- SplitObject(WT_Thymocye, split.by = "rep")
for (i in 1:length(WT_Thymocye.list)) {
WT_Thymocye.list[[i]] <- NormalizeData(WT_Thymocye.list[[i]], verbose = FALSE)
WT_Thymocye.list[[i]] <- FindVariableFeatures(WT_Thymocye.list[[i]], selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
}
reference.list <- WT_Thymocye.list[c("rep1", "rep2","rep2_KO")]
WT_Thymocye.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
WT_Thymocye.integrated <- IntegrateData(anchorset = WT_Thymocye.anchors, dims = 1:30)
# switch to integrated assay. The variable features of this assay are automatically
# set during IntegrateData
DefaultAssay(WT_Thymocye.integrated) <- "integrated"
# Run the standard workflow for visualization and clustering
WT_Thymocye.integrated <- ScaleData(WT_Thymocye.integrated, verbose = FALSE)
WT_Thymocye.integrated <- RunPCA(WT_Thymocye.integrated, npcs = 30, verbose = FALSE)
WT_Thymocye.integrated <- RunUMAP(WT_Thymocye.integrated, reduction = "pca", dims = 1:30)
# Filtering cells with too many or too few cells
Rep1_filtered_rep2 <- subset(WT_Thymocye.integrated, cells = rownames(meta_info[meta_info$rep=='rep1' |
(meta_info$rep=='rep2_KO' & meta_info$celltype=='CD69posDP' & meta_info$nCount_RNA < 1500000 & meta_info$nCount_RNA > 750000) |
(meta_info$rep=='rep2_KO' & meta_info$celltype!='CD69posDP' & meta_info$nCount_RNA < 1500000 & meta_info$nCount_RNA > 500000) |
(meta_info$rep=='rep2' & meta_info$celltype=='CD69posDP' & meta_info$nCount_RNA < 1500000 & meta_info$nCount_RNA > 750000) |
(meta_info$rep=='rep2' & meta_info$celltype!='CD69posDP' & meta_info$nCount_RNA < 1500000 & meta_info$nCount_RNA > 500000)
,]))
# Dividing CD4SP to CD4SP Immature and Mature
meta_info_new <- Rep1_filtered_rep2@meta.data
PCA <- as.data.frame(Rep1_filtered_rep2@reductions$pca@cell.embeddings)
rownames(PCA) <- rownames(Rep1_filtered_rep2@reductions$pca@cell.embeddings)
meta_info_new$PC_1 <- PCA$PC_1
meta_info_new$PC_2 <- PCA$PC_2
meta_info_new$PC_3 <- PCA$PC_3
meta_info_new[meta_info_new$celltype=='CD4+8low' & meta_info_new$PC_2 < meta_info_new$PC_1,'celltype'] <- 'CD4SP_Immature'
meta_info[rownames(meta_info_new),'celltype'] <- meta_info_new$celltypegene_set <- read.csv(file="/Volumes/bioinfomatics$/Mehdi/Matthias/scRNA-seq/scRNAseq_CRG_02012020/gene_set/DE_gene_drodeRes_rep2_fset.csv", header=FALSE, sep=",")
emat <- cbind(emat_rep1,emat_rep2)
nmat <- cbind(nmat_rep1,nmat_rep2)
smat <- cbind(smat_rep1,smat_rep2)
# meta_info <- rbind(merged_meta,meta_data)
emat <- emat[intersect(gene_set$V1,rownames(emat)),]
emat <- emat[!(rownames(emat) =='Cd4' | rownames(emat) == 'Cd8a' | rownames(emat) =='Cd8b1'),]
# gene_set <- gene_set[!(gene_set$V1 =='Cd4' | gene_set$V1 =='Cd8a' | gene_set$V1 =='Cd8b1'),]
###########
#Running Seurat
###########
WT_Thymocye <- CreateSeuratObject(emat, meta.data = meta_info)
WT_Thymocye.list <- SplitObject(WT_Thymocye, split.by = "rep")
for (i in 1:length(WT_Thymocye.list)) {
WT_Thymocye.list[[i]] <- NormalizeData(WT_Thymocye.list[[i]], verbose = FALSE)
WT_Thymocye.list[[i]] <- FindVariableFeatures(WT_Thymocye.list[[i]], selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
}
reference.list <- WT_Thymocye.list[c("rep1", "rep2", "rep2_KO")]
WT_Thymocye.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
WT_Thymocye.integrated <- IntegrateData(anchorset = WT_Thymocye.anchors, dims = 1:30)
library(ggplot2)
library(cowplot)
# switch to integrated assay. The variable features of this assay are automatically
# set during IntegrateData
DefaultAssay(WT_Thymocye.integrated) <- "integrated"
# Run the standard workflow for visualization and clustering
WT_Thymocye.integrated <- ScaleData(WT_Thymocye.integrated, verbose = FALSE)
WT_Thymocye.integrated <- RunPCA(WT_Thymocye.integrated, npcs = 30, verbose = FALSE)
WT_Thymocye.integrated <- RunUMAP(WT_Thymocye.integrated, reduction = "pca", dims = 1:30)
PCA <- as.data.frame(WT_Thymocye.integrated@reductions$pca@cell.embeddings)
# Original runing
emat <- cbind(emat_rep1,emat_rep2)
nmat <- cbind(nmat_rep1,nmat_rep2)
smat <- cbind(smat_rep1,smat_rep2)
###########
#Running Seurat
###########
WT_Thymocye <- CreateSeuratObject(emat, meta.data = meta_info)
WT_Thymocye.list <- SplitObject(WT_Thymocye, split.by = "rep")
for (i in 1:length(WT_Thymocye.list)) {
WT_Thymocye.list[[i]] <- NormalizeData(WT_Thymocye.list[[i]], verbose = FALSE)
WT_Thymocye.list[[i]] <- FindVariableFeatures(WT_Thymocye.list[[i]], selection.method = "vst",
nfeatures = 5000, verbose = FALSE)
}
reference.list <- WT_Thymocye.list[c("rep1", "rep2", "rep2_KO")]
WT_Thymocye.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
WT_Thymocye.integrated <- IntegrateData(anchorset = WT_Thymocye.anchors, dims = 1:30)
library(ggplot2)
library(cowplot)
# switch to integrated assay. The variable features of this assay are automatically
# set during IntegrateData
DefaultAssay(WT_Thymocye.integrated) <- "integrated"
WT_Thymocye.integrated <- ScaleData(WT_Thymocye.integrated, verbose = FALSE)
WT_Thymocye.integrated <- RunPCA(WT_Thymocye.integrated, npcs = 30, verbose = FALSE)
WT_Thymocye.integrated <- RunUMAP(WT_Thymocye.integrated, reduction = "pca", dims = 1:30)
WT_Thymocye.integrated@reductions$pca@cell.embeddings <- as.matrix(PCA)
draw_Plots <- function(data_scatter_new, meta_data_f, gene_name){
# data_scatter_new <- data_scatter
# meta_data_f <- meta_data_new
# gene_name <- "H2-K1"
meta_data_f <- cbind(meta_data_f, as.data.frame(data_scatter_new[,gene_name]))
meta_data_f <- meta_data_f[c(-8,-9)]
colnames(meta_data_f)[13] <- gene_name
meta_data_f$celltype <- factor(meta_data_f$celltype, levels=c("CD8SP", "CD4SP_Mature", "Sel_Intermed_Cd4-Cd8+", "Sel_Intermed_Cd4+Cd8-", "Sel_Intermed_Cd4+Cd8+", "CD69negDP"))
# P1 <- ggplot(meta_data_f, aes(x = meta_data_f[,14], y = celltype, color=celltype))+geom_boxplot()+
# geom_jitter(shape=16, position=position_jitter(0.2))+
# theme_classic()+xlab(paste0(gene_name," Expression"))+
# scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
# theme(text = element_text(size = 18, face = "bold"))+
# theme(axis.text = element_text(size = 18, face = "bold"))
if(gene_name=="H2-K1"){
colnames(meta_data_f)[13] <- "H2K1"
gene_name <- "H2K1"}
if(gene_name=="H2-D1"){
colnames(meta_data_f)[13] <- "H2D1"
gene_name <- "H2D1"}
# P1 <- ggerrorplot(meta_data_f, x = "celltype", y = gene_name,
# desc_stat = "mean_sd", color = "celltype",
# add = "jitter", orientation = "horizontal", size = 1)+
# theme_classic()+ylab(paste0(gene_name," Expression"))+
# scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
# theme(text = element_text(size = 18, face = "bold"))+
# theme(axis.text = element_text(size = 18, face = "bold"))
# P1 <- ggerrorplot(meta_data_f, x = "celltype", y = gene_name,
# desc_stat = "mean_sd", color = "celltype",
# add = "jitter", orientation = "horizontal", size = 1)+
# theme_classic()+ylab(paste0(gene_name," Expression"))+
# scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
# theme(text = element_text(size = 18, face = "bold"))+
# theme(axis.text = element_text(size = 18, face = "bold"))
P1 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.format", method = "wilcox.test",
ref.group = "CD4SP_Mature")+ggtitle(paste0(gene_name, ' where CD4SP_Mature used as refrence for statistic')) # Pairwise comparison against all
P2 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.signif", method = "wilcox.test",
ref.group = "CD4SP_Mature")+ggtitle(paste0(gene_name, ' where CD4SP_Mature used as refrence for statistic')) # Pairwise comparison against all
P3 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.format", method = "wilcox.test",
ref.group = "CD8SP")+ggtitle(paste0(gene_name, ' where CD8SP used as refrence for statistic')) # Pairwise comparison against all
P4 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.signif", method = "wilcox.test",
ref.group = "CD8SP")+ggtitle(paste0(gene_name, ' where CD8SP used as refrence for statistic')) # Pairwise comparison against all
P5 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.format", method = "wilcox.test",
ref.group = "CD69negDP")+ggtitle(paste0(gene_name, ' where CD69negDP used as refrence for statistic')) # Pairwise comparison against all
P6 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.signif", method = "wilcox.test",
ref.group = "CD69negDP")+ggtitle(paste0(gene_name, ' where CD69negDP used as refrence for statistic')) # Pairwise comparison against all
P7 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.format", method = "wilcox.test",
ref.group = "Sel_Intermed_Cd4+Cd8-")+ggtitle(paste0(gene_name, ' where Sel_Intermed_Cd4+Cd8- used as refrence for statistic')) # Pairwise comparison against all
P8 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.signif", method = "wilcox.test",
ref.group = "Sel_Intermed_Cd4+Cd8-")+ggtitle(paste0(gene_name, ' where Sel_Intermed_Cd4+Cd8- used as refrence for statistic')) # Pairwise comparison against all
P9 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.format", method = "wilcox.test",
ref.group = "Sel_Intermed_Cd4+Cd8+")+ggtitle(paste0(gene_name, ' where Sel_Intermed_Cd4+Cd8+ used as refrence for statistic')) # Pairwise comparison against all
P10 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.signif", method = "wilcox.test",
ref.group = "Sel_Intermed_Cd4+Cd8+")+ggtitle(paste0(gene_name, ' where Sel_Intermed_Cd4+Cd8+ used as refrence for statistic')) # Pairwise comparison against all
P11 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.format", method = "wilcox.test",
ref.group = "Sel_Intermed_Cd4-Cd8+")+ggtitle(paste0(gene_name, ' where Sel_Intermed_Cd4-Cd8+ used as refrence for statistic')) # Pairwise comparison against all
P12 <- ggbarplot(meta_data_f, x = "celltype", y = gene_name,
add = c("mean_se", "jitter"),
color = "celltype", palette = "jco",
position = position_dodge(0.8), orientation = "horizontal")+
theme_classic()+ylab(paste0(gene_name," Expression"))+
scale_color_manual(values = c("blue", "green", "red", "darkturquoise", "black", "darkgray"))+
theme(text = element_text(size = 16, face = "bold"))+
theme(axis.text = element_text(size = 16, face = "bold"))+
theme(plot.title = element_text(size = 16,face = "bold"))+
stat_compare_means(label = "p.signif", method = "wilcox.test",
ref.group = "Sel_Intermed_Cd4-Cd8+")+ggtitle(paste0(gene_name, ' where Sel_Intermed_Cd4-Cd8+ used as refrence for statistic')) # Pairwise comparison against all
# print(P1)
# print(P2)
# print(P3)
# print(P4)
# print(P5)
# print(P6)
print(P7)
# print(P8)
# print(P9)
# print(P10)
print(P11)
# print(P12)
}
####
Rep1_filtered_rep2 <- subset(WT_Thymocye.integrated, cells = rownames(meta_info[meta_info$celltype != "CD4SP_Immature" &
((meta_info$rep=='rep2' & meta_info$celltype=='CD69posDP' & meta_info$nCount_RNA < 1500000 & meta_info$nCount_RNA > 750000) |
(meta_info$rep=='rep2' & meta_info$celltype!='CD69posDP' & meta_info$nCount_RNA < 1500000 & meta_info$nCount_RNA > 500000))
,]))
Rep1_filtered_rep2 <- subset(Rep1_filtered_rep2, cells = rownames(meta_info[
!((meta_info$celltype=="CD69posDP" | meta_info$celltype=="TCRhiDP" | meta_info$celltype=="CD4+8low") & meta_info$Cd4=="Cd4-" & meta_info$Cd8=="Cd8-")
,]))
Rep1_filtered_rep2@meta.data[((Rep1_filtered_rep2@meta.data$celltype=="CD69posDP" | Rep1_filtered_rep2@meta.data$celltype=="TCRhiDP" | Rep1_filtered_rep2@meta.data$celltype=="CD4+8low") & Rep1_filtered_rep2@meta.data$Cd4=="Cd4+" & Rep1_filtered_rep2@meta.data$Cd8=="Cd8+"), 'celltype'] <- "Sel_Intermed_Cd4+Cd8+"
Rep1_filtered_rep2@meta.data[((Rep1_filtered_rep2@meta.data$celltype=="CD69posDP" | Rep1_filtered_rep2@meta.data$celltype=="TCRhiDP" | Rep1_filtered_rep2@meta.data$celltype=="CD4+8low") & Rep1_filtered_rep2@meta.data$Cd4=="Cd4+" & Rep1_filtered_rep2@meta.data$Cd8=="Cd8-"), 'celltype'] <- "Sel_Intermed_Cd4+Cd8-"
Rep1_filtered_rep2@meta.data[((Rep1_filtered_rep2@meta.data$celltype=="CD69posDP" | Rep1_filtered_rep2@meta.data$celltype=="TCRhiDP" | Rep1_filtered_rep2@meta.data$celltype=="CD4+8low") & Rep1_filtered_rep2@meta.data$Cd4=="Cd4-" & Rep1_filtered_rep2@meta.data$Cd8=="Cd8+"), 'celltype'] <- "Sel_Intermed_Cd4-Cd8+"
DefaultAssay(Rep1_filtered_rep2) <- "RNA"
data_scatter <- as.data.frame(t(GetAssayData(Rep1_filtered_rep2,slot='data',assay = "RNA")))
meta_data_new <- Rep1_filtered_rep2@meta.data
meta_data_new$PC_1 <- 0
meta_data_new$PC_1 <- Rep1_filtered_rep2@reductions$pca@cell.embeddings[rownames(meta_data_new),'PC_1']
meta_data_new$Cd4Cd8 <- "NA"
meta_data_new$Cd4Cd8 <- paste0(meta_data_new$Cd4, meta_data_new$Cd8)
meta_data_new$PC1_range <- "-10_to_-5"
meta_data_new[meta_data_new$PC_1 > -5 & meta_data_new$PC_1 < -2.5, "PC1_range" ] <- "-5_to_-2.5"
meta_data_new[meta_data_new$PC_1 > -2.5 & meta_data_new$PC_1 < 0, "PC1_range" ] <- "-2.5_to_0"
meta_data_new[meta_data_new$PC_1 > 0 & meta_data_new$PC_1 < 2.5, "PC1_range" ] <- "0_to_2.5"
meta_data_new[meta_data_new$PC_1 > 2.5 & meta_data_new$PC_1 < 7.5, "PC1_range" ] <- "2.5_to_7.5"
draw_Plots(data_scatter, meta_data_new, 'Zbtb7b')draw_Plots(data_scatter, meta_data_new, 'Cd40lg')draw_Plots(data_scatter, meta_data_new, 'Runx3')draw_Plots(data_scatter, meta_data_new, 'Nkg7')draw_Plots(data_scatter, meta_data_new, 'Cd69')draw_Plots(data_scatter, meta_data_new, 'Nr4a1')draw_Plots(data_scatter, meta_data_new, 'Il7r')draw_Plots(data_scatter, meta_data_new, 'Socs1')draw_Plots(data_scatter, meta_data_new, 'Cd40lg')draw_Plots(data_scatter, meta_data_new, 'Prf1')draw_Plots(data_scatter, meta_data_new, "H2-K1")draw_Plots(data_scatter, meta_data_new, 'B2m')sessionInfo()## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] grid splines parallel stats4 stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.1.0 gridExtra_2.3
## [3] cowplot_1.0.0 tiff_0.1-5
## [5] jpeg_0.1-8.1 VennDiagram_1.6.20
## [7] futile.logger_1.4.3 metaMA_3.1.2
## [9] SCORPIUS_1.0.6 dynamicTreeCut_1.63-1
## [11] cluster_2.1.0 org.Mm.eg.db_3.10.0
## [13] AnnotationDbi_1.48.0 knitr_1.28
## [15] scran_1.14.6 scater_1.14.6
## [17] RColorBrewer_1.1-2 Seurat_3.1.4
## [19] DT_0.12 slingshot_1.4.0
## [21] princurve_2.1.4 splatter_1.10.1
## [23] SingleCellExperiment_1.8.0 gam_1.16.1
## [25] foreach_1.4.8 ggpubr_0.2.5
## [27] magrittr_1.5 DESeq2_1.26.0
## [29] SummarizedExperiment_1.16.1 DelayedArray_0.12.2
## [31] BiocParallel_1.20.1 matrixStats_0.56.0
## [33] Biobase_2.46.0 GenomicRanges_1.38.0
## [35] GenomeInfoDb_1.22.0 IRanges_2.20.2
## [37] S4Vectors_0.24.3 BiocGenerics_0.32.0
## [39] velocyto.R_0.6 Matrix_1.2-18
## [41] forcats_0.5.0 stringr_1.4.0
## [43] dplyr_0.8.5 purrr_0.3.3
## [45] readr_1.3.1 tidyr_1.0.2
## [47] tibble_2.1.3 ggplot2_3.3.0
## [49] tidyverse_1.3.0 dyno_0.1.2
## [51] dynwrap_1.2.0.9000 dynplot_1.0.2.9000
## [53] dynmethods_1.0.5 dynguidelines_1.0.0
## [55] dynfeature_1.0.0.9000
##
## loaded via a namespace (and not attached):
## [1] rsvd_1.0.3 Hmisc_4.3-1 ica_1.0-2
## [4] ps_1.3.2 lmtest_0.9-37 crayon_1.3.4
## [7] MASS_7.3-51.5 nlme_3.1-144 backports_1.1.5
## [10] reprex_0.3.0 rlang_0.4.5 GA_3.2
## [13] XVector_0.26.0 ROCR_1.0-7 readxl_1.3.1
## [16] irlba_2.3.3 limma_3.42.2 dyndimred_1.0.3.9000
## [19] bit64_0.9-7 glue_1.3.2 pheatmap_1.0.12
## [22] sctransform_0.2.1 processx_3.4.2 vipor_0.4.5
## [25] dynutils_1.0.5 haven_2.2.0 tidyselect_1.0.0
## [28] lmds_0.1.0 fitdistrplus_1.0-14 XML_3.99-0.3
## [31] zoo_1.8-7 xtable_1.8-4 rje_1.10.15
## [34] evaluate_0.14 bibtex_0.4.2.2 Rdpack_0.11-1
## [37] cli_2.0.2 zlibbioc_1.32.0 sn_1.5-5
## [40] rstudioapi_0.11 rpart_4.1-15 lambda.r_1.2.4
## [43] shiny_1.4.0.2 BiocSingular_1.2.2 xfun_0.12
## [46] multtest_2.42.0 caTools_1.18.0 tidygraph_1.1.2
## [49] TSP_1.1-9 pcaMethods_1.78.0 ggrepel_0.8.2
## [52] ape_5.3 listenv_0.8.0 png_0.1-7
## [55] future_1.16.0 withr_2.1.2 bitops_1.0-6
## [58] ggforce_0.3.1 ranger_0.12.1 plyr_1.8.6
## [61] cellranger_1.1.0 dqrng_0.2.1 pillar_1.4.3
## [64] RcppParallel_5.0.0 gplots_3.0.3 multcomp_1.4-12
## [67] fs_1.3.2 ellipsis_0.3.0 DelayedMatrixStats_1.8.0
## [70] vctrs_0.2.4 generics_0.0.2 metap_1.3
## [73] tools_3.6.3 foreign_0.8-75 beeswarm_0.2.3
## [76] munsell_0.5.0 tweenr_1.0.1 fastmap_1.0.1
## [79] compiler_3.6.3 httpuv_1.5.2 babelwhale_1.0.1
## [82] plotly_4.9.2 GenomeInfoDbData_1.2.2 edgeR_3.28.1
## [85] lattice_0.20-38 mutoss_0.1-12 later_1.0.0
## [88] jsonlite_1.6.1 scales_1.1.0 pbapply_1.4-2
## [91] genefilter_1.68.0 lazyeval_0.2.2 promises_1.1.0
## [94] latticeExtra_0.6-29 reticulate_1.13 checkmate_2.0.0
## [97] rmarkdown_2.1 sandwich_2.5-1 webshot_0.5.2
## [100] statmod_1.4.34 Rtsne_0.15 uwot_0.1.5
## [103] igraph_1.2.4.2 proxyC_0.1.5 survival_3.1-8
## [106] numDeriv_2016.8-1.1 yaml_2.2.1 plotrix_3.7-7
## [109] carrier_0.1.0 htmltools_0.4.0 memoise_1.1.0
## [112] locfit_1.5-9.1 graphlayouts_0.6.0 viridisLite_0.3.0
## [115] digest_0.6.25 assertthat_0.2.1 mime_0.9
## [118] futile.options_1.0.1 npsurv_0.4-0 dynparam_1.0.0
## [121] RSQLite_2.2.0 future.apply_1.4.0 lsei_1.2-0
## [124] remotes_2.1.1 data.table_1.12.8 blob_1.2.1
## [127] labeling_0.3 ggsci_2.9 Formula_1.2-3
## [130] RCurl_1.98-1.1 SMVar_1.3.3 broom_0.5.5
## [133] hms_0.5.3 modelr_0.1.6 colorspace_1.4-1
## [136] base64enc_0.1-3 mnormt_1.5-6 ggbeeswarm_0.6.0
## [139] nnet_7.3-12 Rcpp_1.0.3 mclust_5.4.5
## [142] RANN_2.6.1 mvtnorm_1.1-0 fansi_0.4.1
## [145] R6_2.4.1 ggridges_0.5.2 lifecycle_0.2.0
## [148] acepack_1.4.1 formatR_1.7 TFisher_0.2.0
## [151] ggsignif_0.6.0 gdata_2.18.0 leiden_0.3.3
## [154] testthat_2.3.2 RcppAnnoy_0.0.16 TH.data_1.0-10
## [157] iterators_1.0.12 htmlwidgets_1.5.1 polyclip_1.10-0
## [160] rvest_0.3.5 mgcv_1.8-31 globals_0.12.5
## [163] htmlTable_1.13.3 patchwork_1.0.0.9000 codetools_0.2-16
## [166] lubridate_1.7.4 gtools_3.8.1 dbplyr_1.4.2
## [169] RSpectra_0.16-0 gtable_0.3.0 tsne_0.1-3
## [172] DBI_1.1.0 httr_1.4.1 KernSmooth_2.23-16
## [175] stringi_1.4.6 reshape2_1.4.3 farver_2.0.3
## [178] annotate_1.64.0 viridis_0.5.1 xml2_1.2.5
## [181] BiocNeighbors_1.4.2 geneplotter_1.64.0 bit_1.1-15.2
## [184] ggraph_2.0.1 pkgconfig_2.0.3 gbRd_0.4-11